{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "initial_id",
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# 2、统计全校总分前10的学生的分数【学号，姓名，班级，总分】"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cb11a80c7a4b7b89",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 1、读取数据\n",
    "import pandas as pd\n",
    "\n",
    "from python.day3.demo10_关联 import students\n",
    "\n",
    "scores = pd.read_csv(\n",
    "    \"../../data/score.txt\",\n",
    "    sep=\",\",\n",
    "    header=None,\n",
    "    names=[\"id\", \"cid\", \"score\"],\n",
    "    encoding=\"utf-8\",\n",
    ")\n",
    "scores"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4d2116aa3e3270d6",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 2、统计总分\n",
    "sum_score = scores.groupby(\"id\")[\"score\"].sum()\n",
    "sum_score"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c96404298957388b",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 按照总分排序，取前十\n",
    "# ascending: 指定排序方式，默认是升序\n",
    "top10_sum_score = sum_score.sort_values(ascending=False).head(10).reset_index()\n",
    "top10_sum_score"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "55fe6cb142d52240",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 读取学生表\n",
    "students = pd.read_csv(\n",
    "    \"../../data/student.txt\",\n",
    "    sep=\",\",\n",
    "    header=None,\n",
    "    names=[\"id\", \"name\", \"age\", \"sex\", \"clazz\"],\n",
    "    encoding=\"utf-8\"\n",
    ")\n",
    "students"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "187c7aa63f8f9976",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-08-22T09:13:46.219329Z",
     "start_time": "2024-08-22T09:13:46.203437Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>name</th>\n",
       "      <th>clazz</th>\n",
       "      <th>score</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1500100929</td>\n",
       "      <td>满慕易</td>\n",
       "      <td>理科三班</td>\n",
       "      <td>630</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1500100080</td>\n",
       "      <td>巫景彰</td>\n",
       "      <td>理科五班</td>\n",
       "      <td>628</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1500100308</td>\n",
       "      <td>黄初夏</td>\n",
       "      <td>文科一班</td>\n",
       "      <td>628</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1500100873</td>\n",
       "      <td>路鸿志</td>\n",
       "      <td>文科四班</td>\n",
       "      <td>612</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1500100418</td>\n",
       "      <td>蓟海昌</td>\n",
       "      <td>文科二班</td>\n",
       "      <td>611</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>1500100258</td>\n",
       "      <td>湛昌勋</td>\n",
       "      <td>文科四班</td>\n",
       "      <td>604</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>1500100875</td>\n",
       "      <td>马向南</td>\n",
       "      <td>文科一班</td>\n",
       "      <td>595</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>1500100930</td>\n",
       "      <td>闻运凯</td>\n",
       "      <td>文科五班</td>\n",
       "      <td>589</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>1500100596</td>\n",
       "      <td>田晨潍</td>\n",
       "      <td>理科六班</td>\n",
       "      <td>587</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>1500100834</td>\n",
       "      <td>谷念薇</td>\n",
       "      <td>理科二班</td>\n",
       "      <td>586</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "           id name clazz  score\n",
       "0  1500100929  满慕易  理科三班    630\n",
       "1  1500100080  巫景彰  理科五班    628\n",
       "2  1500100308  黄初夏  文科一班    628\n",
       "3  1500100873  路鸿志  文科四班    612\n",
       "4  1500100418  蓟海昌  文科二班    611\n",
       "5  1500100258  湛昌勋  文科四班    604\n",
       "6  1500100875  马向南  文科一班    595\n",
       "7  1500100930  闻运凯  文科五班    589\n",
       "8  1500100596  田晨潍  理科六班    587\n",
       "9  1500100834  谷念薇  理科二班    586"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 关联获取姓名和班级\n",
    "result = top10_sum_score.merge(students, on=\"id\")[[\"id\", \"name\", \"clazz\", \"score\"]]\n",
    "result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "78327f8d204c638b",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-08-22T09:15:08.694771Z",
     "start_time": "2024-08-22T09:15:08.679289Z"
    }
   },
   "outputs": [],
   "source": [
    "# 将结果保存安东文件中\n",
    "result.to_csv(\"../../data/top10_sum_score.txt\", sep=\"\\t\", index=False, header=None)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "3fe84c1963fc76e9",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-08-23T01:35:56.015138Z",
     "start_time": "2024-08-23T01:35:55.999527Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>name</th>\n",
       "      <th>age</th>\n",
       "      <th>sex</th>\n",
       "      <th>clazz</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1500100001</td>\n",
       "      <td>施笑槐</td>\n",
       "      <td>22</td>\n",
       "      <td>女</td>\n",
       "      <td>文科六班</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1500100002</td>\n",
       "      <td>吕金鹏</td>\n",
       "      <td>24</td>\n",
       "      <td>男</td>\n",
       "      <td>文科六班</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>1500100007</td>\n",
       "      <td>尚孤风</td>\n",
       "      <td>23</td>\n",
       "      <td>女</td>\n",
       "      <td>文科六班</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>1500100013</td>\n",
       "      <td>逯君昊</td>\n",
       "      <td>24</td>\n",
       "      <td>男</td>\n",
       "      <td>文科二班</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>1500100016</td>\n",
       "      <td>潘访烟</td>\n",
       "      <td>23</td>\n",
       "      <td>女</td>\n",
       "      <td>文科一班</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>985</th>\n",
       "      <td>1500100986</td>\n",
       "      <td>左天曼</td>\n",
       "      <td>22</td>\n",
       "      <td>女</td>\n",
       "      <td>文科一班</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>986</th>\n",
       "      <td>1500100987</td>\n",
       "      <td>双昆杰</td>\n",
       "      <td>24</td>\n",
       "      <td>男</td>\n",
       "      <td>文科四班</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>987</th>\n",
       "      <td>1500100988</td>\n",
       "      <td>余鸿云</td>\n",
       "      <td>22</td>\n",
       "      <td>男</td>\n",
       "      <td>文科六班</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>995</th>\n",
       "      <td>1500100996</td>\n",
       "      <td>厉运凡</td>\n",
       "      <td>24</td>\n",
       "      <td>男</td>\n",
       "      <td>文科三班</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>998</th>\n",
       "      <td>1500100999</td>\n",
       "      <td>钟绮晴</td>\n",
       "      <td>23</td>\n",
       "      <td>女</td>\n",
       "      <td>文科五班</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>522 rows × 5 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "             id name  age sex clazz\n",
       "0    1500100001  施笑槐   22   女  文科六班\n",
       "1    1500100002  吕金鹏   24   男  文科六班\n",
       "6    1500100007  尚孤风   23   女  文科六班\n",
       "12   1500100013  逯君昊   24   男  文科二班\n",
       "15   1500100016  潘访烟   23   女  文科一班\n",
       "..          ...  ...  ...  ..   ...\n",
       "985  1500100986  左天曼   22   女  文科一班\n",
       "986  1500100987  双昆杰   24   男  文科四班\n",
       "987  1500100988  余鸿云   22   男  文科六班\n",
       "995  1500100996  厉运凡   24   男  文科三班\n",
       "998  1500100999  钟绮晴   23   女  文科五班\n",
       "\n",
       "[522 rows x 5 columns]"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 1、统计文科班级每个班级语文的平均分【班级，平均分】\n",
    "\n",
    "# 取出文科学生\n",
    "student_wk = students[students[\"clazz\"].str[0:2] == \"文科\"]\n",
    "student_wk"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "a0531e8f3183715f",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-08-23T01:38:46.920180Z",
     "start_time": "2024-08-23T01:38:46.889035Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>cid</th>\n",
       "      <th>cname</th>\n",
       "      <th>max_score</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1000001</td>\n",
       "      <td>语文</td>\n",
       "      <td>150</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1000002</td>\n",
       "      <td>数学</td>\n",
       "      <td>150</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1000003</td>\n",
       "      <td>英语</td>\n",
       "      <td>150</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1000004</td>\n",
       "      <td>政治</td>\n",
       "      <td>100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1000005</td>\n",
       "      <td>历史</td>\n",
       "      <td>100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>1000006</td>\n",
       "      <td>物理</td>\n",
       "      <td>100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>1000007</td>\n",
       "      <td>化学</td>\n",
       "      <td>100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>1000008</td>\n",
       "      <td>地理</td>\n",
       "      <td>100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>1000009</td>\n",
       "      <td>生物</td>\n",
       "      <td>100</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       cid cname  max_score\n",
       "0  1000001    语文        150\n",
       "1  1000002    数学        150\n",
       "2  1000003    英语        150\n",
       "3  1000004    政治        100\n",
       "4  1000005    历史        100\n",
       "5  1000006    物理        100\n",
       "6  1000007    化学        100\n",
       "7  1000008    地理        100\n",
       "8  1000009    生物        100"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 读取科目表\n",
    "subjects = pd.read_csv(\"../../data/subject.txt\",\n",
    "                       sep=\",\",\n",
    "                       header=None,\n",
    "                       names=[\"cid\", \"cname\", \"max_score\"],\n",
    "                       encoding=\"utf-8\",\n",
    "                       )\n",
    "subjects"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "e06c96b5753754f2",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-08-23T01:42:39.724120Z",
     "start_time": "2024-08-23T01:42:39.708464Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>cid</th>\n",
       "      <th>cname</th>\n",
       "      <th>max_score</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1000001</td>\n",
       "      <td>语文</td>\n",
       "      <td>150</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       cid cname  max_score\n",
       "0  1000001    语文        150"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 取出语文科目\n",
    "\n",
    "subjects_yw = subjects[subjects[\"cname\"] == \"语文\"]\n",
    "subjects_yw"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "f52ab5bf86d7b46c",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-08-23T01:48:36.611160Z",
     "start_time": "2024-08-23T01:48:36.595533Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>cid</th>\n",
       "      <th>score</th>\n",
       "      <th>cname</th>\n",
       "      <th>max_score</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1500100001</td>\n",
       "      <td>1000001</td>\n",
       "      <td>98</td>\n",
       "      <td>语文</td>\n",
       "      <td>150</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1500100002</td>\n",
       "      <td>1000001</td>\n",
       "      <td>139</td>\n",
       "      <td>语文</td>\n",
       "      <td>150</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1500100003</td>\n",
       "      <td>1000001</td>\n",
       "      <td>48</td>\n",
       "      <td>语文</td>\n",
       "      <td>150</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1500100004</td>\n",
       "      <td>1000001</td>\n",
       "      <td>147</td>\n",
       "      <td>语文</td>\n",
       "      <td>150</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1500100005</td>\n",
       "      <td>1000001</td>\n",
       "      <td>105</td>\n",
       "      <td>语文</td>\n",
       "      <td>150</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>995</th>\n",
       "      <td>1500100996</td>\n",
       "      <td>1000001</td>\n",
       "      <td>43</td>\n",
       "      <td>语文</td>\n",
       "      <td>150</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>996</th>\n",
       "      <td>1500100997</td>\n",
       "      <td>1000001</td>\n",
       "      <td>113</td>\n",
       "      <td>语文</td>\n",
       "      <td>150</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>997</th>\n",
       "      <td>1500100998</td>\n",
       "      <td>1000001</td>\n",
       "      <td>37</td>\n",
       "      <td>语文</td>\n",
       "      <td>150</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>998</th>\n",
       "      <td>1500100999</td>\n",
       "      <td>1000001</td>\n",
       "      <td>115</td>\n",
       "      <td>语文</td>\n",
       "      <td>150</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>999</th>\n",
       "      <td>1500101000</td>\n",
       "      <td>1000001</td>\n",
       "      <td>100</td>\n",
       "      <td>语文</td>\n",
       "      <td>150</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1000 rows × 5 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "             id      cid  score cname  max_score\n",
       "0    1500100001  1000001     98    语文        150\n",
       "1    1500100002  1000001    139    语文        150\n",
       "2    1500100003  1000001     48    语文        150\n",
       "3    1500100004  1000001    147    语文        150\n",
       "4    1500100005  1000001    105    语文        150\n",
       "..          ...      ...    ...   ...        ...\n",
       "995  1500100996  1000001     43    语文        150\n",
       "996  1500100997  1000001    113    语文        150\n",
       "997  1500100998  1000001     37    语文        150\n",
       "998  1500100999  1000001    115    语文        150\n",
       "999  1500101000  1000001    100    语文        150\n",
       "\n",
       "[1000 rows x 5 columns]"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# how：表的关联方式，inner join(内关联，两边都有才出来结果)， left join(以左表为基础关联，如果右表没有会补空)\n",
    "# 关联科目，取出所有语文的分数\n",
    "scores_yw = scores.merge(subjects_yw, on=\"cid\")\n",
    "scores_yw"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "id": "c4cae4e85d50fe05",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-08-23T01:52:16.751654Z",
     "start_time": "2024-08-23T01:52:16.736033Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "clazz\n",
       "文科一班    77.597222\n",
       "文科三班    73.127660\n",
       "文科二班    79.356322\n",
       "文科五班    69.773810\n",
       "文科六班    73.836538\n",
       "文科四班    77.716049\n",
       "Name: score, dtype: float64"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 关联语文的分数和文科学生,计算班级平均分\n",
    "student_wk.merge(scores_yw, on=\"id\").groupby(\"clazz\")[\"score\"].mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "id": "ac362f92bd5e3418",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-08-23T02:14:24.990749Z",
     "start_time": "2024-08-23T02:14:24.968581Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>name</th>\n",
       "      <th>clazz</th>\n",
       "      <th>cname</th>\n",
       "      <th>score</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1500100592</td>\n",
       "      <td>严曜栋</td>\n",
       "      <td>理科三班</td>\n",
       "      <td>语文</td>\n",
       "      <td>13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1500100592</td>\n",
       "      <td>严曜栋</td>\n",
       "      <td>理科三班</td>\n",
       "      <td>数学</td>\n",
       "      <td>140</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1500100592</td>\n",
       "      <td>严曜栋</td>\n",
       "      <td>理科三班</td>\n",
       "      <td>英语</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1500100592</td>\n",
       "      <td>严曜栋</td>\n",
       "      <td>理科三班</td>\n",
       "      <td>化学</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1500100592</td>\n",
       "      <td>严曜栋</td>\n",
       "      <td>理科三班</td>\n",
       "      <td>地理</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>1500100592</td>\n",
       "      <td>严曜栋</td>\n",
       "      <td>理科三班</td>\n",
       "      <td>生物</td>\n",
       "      <td>94</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>1500100028</td>\n",
       "      <td>幸浩邈</td>\n",
       "      <td>理科五班</td>\n",
       "      <td>语文</td>\n",
       "      <td>147</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>1500100028</td>\n",
       "      <td>幸浩邈</td>\n",
       "      <td>理科五班</td>\n",
       "      <td>数学</td>\n",
       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>1500100028</td>\n",
       "      <td>幸浩邈</td>\n",
       "      <td>理科五班</td>\n",
       "      <td>英语</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>1500100028</td>\n",
       "      <td>幸浩邈</td>\n",
       "      <td>理科五班</td>\n",
       "      <td>化学</td>\n",
       "      <td>86</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>1500100028</td>\n",
       "      <td>幸浩邈</td>\n",
       "      <td>理科五班</td>\n",
       "      <td>地理</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>1500100028</td>\n",
       "      <td>幸浩邈</td>\n",
       "      <td>理科五班</td>\n",
       "      <td>生物</td>\n",
       "      <td>70</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>1500100195</td>\n",
       "      <td>凌昆锐</td>\n",
       "      <td>文科六班</td>\n",
       "      <td>语文</td>\n",
       "      <td>21</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>1500100195</td>\n",
       "      <td>凌昆锐</td>\n",
       "      <td>文科六班</td>\n",
       "      <td>数学</td>\n",
       "      <td>147</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>1500100195</td>\n",
       "      <td>凌昆锐</td>\n",
       "      <td>文科六班</td>\n",
       "      <td>英语</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>1500100195</td>\n",
       "      <td>凌昆锐</td>\n",
       "      <td>文科六班</td>\n",
       "      <td>政治</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>1500100195</td>\n",
       "      <td>凌昆锐</td>\n",
       "      <td>文科六班</td>\n",
       "      <td>历史</td>\n",
       "      <td>99</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>1500100195</td>\n",
       "      <td>凌昆锐</td>\n",
       "      <td>文科六班</td>\n",
       "      <td>物理</td>\n",
       "      <td>30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>1500100782</td>\n",
       "      <td>罗静珊</td>\n",
       "      <td>文科六班</td>\n",
       "      <td>语文</td>\n",
       "      <td>145</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>1500100782</td>\n",
       "      <td>罗静珊</td>\n",
       "      <td>文科六班</td>\n",
       "      <td>数学</td>\n",
       "      <td>23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>1500100782</td>\n",
       "      <td>罗静珊</td>\n",
       "      <td>文科六班</td>\n",
       "      <td>英语</td>\n",
       "      <td>21</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>1500100782</td>\n",
       "      <td>罗静珊</td>\n",
       "      <td>文科六班</td>\n",
       "      <td>政治</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>1500100782</td>\n",
       "      <td>罗静珊</td>\n",
       "      <td>文科六班</td>\n",
       "      <td>历史</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>1500100782</td>\n",
       "      <td>罗静珊</td>\n",
       "      <td>文科六班</td>\n",
       "      <td>物理</td>\n",
       "      <td>98</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>1500100560</td>\n",
       "      <td>卜又琴</td>\n",
       "      <td>理科二班</td>\n",
       "      <td>语文</td>\n",
       "      <td>17</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>1500100560</td>\n",
       "      <td>卜又琴</td>\n",
       "      <td>理科二班</td>\n",
       "      <td>数学</td>\n",
       "      <td>128</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>1500100560</td>\n",
       "      <td>卜又琴</td>\n",
       "      <td>理科二班</td>\n",
       "      <td>英语</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>1500100560</td>\n",
       "      <td>卜又琴</td>\n",
       "      <td>理科二班</td>\n",
       "      <td>化学</td>\n",
       "      <td>94</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>1500100560</td>\n",
       "      <td>卜又琴</td>\n",
       "      <td>理科二班</td>\n",
       "      <td>地理</td>\n",
       "      <td>23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>1500100560</td>\n",
       "      <td>卜又琴</td>\n",
       "      <td>理科二班</td>\n",
       "      <td>生物</td>\n",
       "      <td>96</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>1500100346</td>\n",
       "      <td>杭德昌</td>\n",
       "      <td>理科二班</td>\n",
       "      <td>语文</td>\n",
       "      <td>120</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td>1500100346</td>\n",
       "      <td>杭德昌</td>\n",
       "      <td>理科二班</td>\n",
       "      <td>数学</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32</th>\n",
       "      <td>1500100346</td>\n",
       "      <td>杭德昌</td>\n",
       "      <td>理科二班</td>\n",
       "      <td>英语</td>\n",
       "      <td>143</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>33</th>\n",
       "      <td>1500100346</td>\n",
       "      <td>杭德昌</td>\n",
       "      <td>理科二班</td>\n",
       "      <td>化学</td>\n",
       "      <td>22</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>34</th>\n",
       "      <td>1500100346</td>\n",
       "      <td>杭德昌</td>\n",
       "      <td>理科二班</td>\n",
       "      <td>地理</td>\n",
       "      <td>95</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>35</th>\n",
       "      <td>1500100346</td>\n",
       "      <td>杭德昌</td>\n",
       "      <td>理科二班</td>\n",
       "      <td>生物</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36</th>\n",
       "      <td>1500100283</td>\n",
       "      <td>侯千风</td>\n",
       "      <td>文科三班</td>\n",
       "      <td>语文</td>\n",
       "      <td>21</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37</th>\n",
       "      <td>1500100283</td>\n",
       "      <td>侯千风</td>\n",
       "      <td>文科三班</td>\n",
       "      <td>数学</td>\n",
       "      <td>139</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38</th>\n",
       "      <td>1500100283</td>\n",
       "      <td>侯千风</td>\n",
       "      <td>文科三班</td>\n",
       "      <td>英语</td>\n",
       "      <td>143</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39</th>\n",
       "      <td>1500100283</td>\n",
       "      <td>侯千风</td>\n",
       "      <td>文科三班</td>\n",
       "      <td>政治</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40</th>\n",
       "      <td>1500100283</td>\n",
       "      <td>侯千风</td>\n",
       "      <td>文科三班</td>\n",
       "      <td>历史</td>\n",
       "      <td>24</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>41</th>\n",
       "      <td>1500100283</td>\n",
       "      <td>侯千风</td>\n",
       "      <td>文科三班</td>\n",
       "      <td>物理</td>\n",
       "      <td>89</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>42</th>\n",
       "      <td>1500100091</td>\n",
       "      <td>蔺水风</td>\n",
       "      <td>文科一班</td>\n",
       "      <td>语文</td>\n",
       "      <td>23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>43</th>\n",
       "      <td>1500100091</td>\n",
       "      <td>蔺水风</td>\n",
       "      <td>文科一班</td>\n",
       "      <td>数学</td>\n",
       "      <td>98</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44</th>\n",
       "      <td>1500100091</td>\n",
       "      <td>蔺水风</td>\n",
       "      <td>文科一班</td>\n",
       "      <td>英语</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45</th>\n",
       "      <td>1500100091</td>\n",
       "      <td>蔺水风</td>\n",
       "      <td>文科一班</td>\n",
       "      <td>政治</td>\n",
       "      <td>13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>46</th>\n",
       "      <td>1500100091</td>\n",
       "      <td>蔺水风</td>\n",
       "      <td>文科一班</td>\n",
       "      <td>历史</td>\n",
       "      <td>96</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>47</th>\n",
       "      <td>1500100091</td>\n",
       "      <td>蔺水风</td>\n",
       "      <td>文科一班</td>\n",
       "      <td>物理</td>\n",
       "      <td>96</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48</th>\n",
       "      <td>1500100971</td>\n",
       "      <td>郝曼凡</td>\n",
       "      <td>文科五班</td>\n",
       "      <td>语文</td>\n",
       "      <td>13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>49</th>\n",
       "      <td>1500100971</td>\n",
       "      <td>郝曼凡</td>\n",
       "      <td>文科五班</td>\n",
       "      <td>数学</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50</th>\n",
       "      <td>1500100971</td>\n",
       "      <td>郝曼凡</td>\n",
       "      <td>文科五班</td>\n",
       "      <td>英语</td>\n",
       "      <td>134</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>51</th>\n",
       "      <td>1500100971</td>\n",
       "      <td>郝曼凡</td>\n",
       "      <td>文科五班</td>\n",
       "      <td>政治</td>\n",
       "      <td>90</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>52</th>\n",
       "      <td>1500100971</td>\n",
       "      <td>郝曼凡</td>\n",
       "      <td>文科五班</td>\n",
       "      <td>历史</td>\n",
       "      <td>19</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>53</th>\n",
       "      <td>1500100971</td>\n",
       "      <td>郝曼凡</td>\n",
       "      <td>文科五班</td>\n",
       "      <td>物理</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>54</th>\n",
       "      <td>1500100649</td>\n",
       "      <td>班鸿晖</td>\n",
       "      <td>文科三班</td>\n",
       "      <td>语文</td>\n",
       "      <td>25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>55</th>\n",
       "      <td>1500100649</td>\n",
       "      <td>班鸿晖</td>\n",
       "      <td>文科三班</td>\n",
       "      <td>数学</td>\n",
       "      <td>120</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>56</th>\n",
       "      <td>1500100649</td>\n",
       "      <td>班鸿晖</td>\n",
       "      <td>文科三班</td>\n",
       "      <td>英语</td>\n",
       "      <td>22</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>57</th>\n",
       "      <td>1500100649</td>\n",
       "      <td>班鸿晖</td>\n",
       "      <td>文科三班</td>\n",
       "      <td>政治</td>\n",
       "      <td>96</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>58</th>\n",
       "      <td>1500100649</td>\n",
       "      <td>班鸿晖</td>\n",
       "      <td>文科三班</td>\n",
       "      <td>历史</td>\n",
       "      <td>90</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>59</th>\n",
       "      <td>1500100649</td>\n",
       "      <td>班鸿晖</td>\n",
       "      <td>文科三班</td>\n",
       "      <td>物理</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            id name clazz cname  score\n",
       "0   1500100592  严曜栋  理科三班    语文     13\n",
       "1   1500100592  严曜栋  理科三班    数学    140\n",
       "2   1500100592  严曜栋  理科三班    英语      8\n",
       "3   1500100592  严曜栋  理科三班    化学      3\n",
       "4   1500100592  严曜栋  理科三班    地理      0\n",
       "5   1500100592  严曜栋  理科三班    生物     94\n",
       "6   1500100028  幸浩邈  理科五班    语文    147\n",
       "7   1500100028  幸浩邈  理科五班    数学     12\n",
       "8   1500100028  幸浩邈  理科五班    英语      6\n",
       "9   1500100028  幸浩邈  理科五班    化学     86\n",
       "10  1500100028  幸浩邈  理科五班    地理      2\n",
       "11  1500100028  幸浩邈  理科五班    生物     70\n",
       "12  1500100195  凌昆锐  文科六班    语文     21\n",
       "13  1500100195  凌昆锐  文科六班    数学    147\n",
       "14  1500100195  凌昆锐  文科六班    英语     10\n",
       "15  1500100195  凌昆锐  文科六班    政治      5\n",
       "16  1500100195  凌昆锐  文科六班    历史     99\n",
       "17  1500100195  凌昆锐  文科六班    物理     30\n",
       "18  1500100782  罗静珊  文科六班    语文    145\n",
       "19  1500100782  罗静珊  文科六班    数学     23\n",
       "20  1500100782  罗静珊  文科六班    英语     21\n",
       "21  1500100782  罗静珊  文科六班    政治      8\n",
       "22  1500100782  罗静珊  文科六班    历史      8\n",
       "23  1500100782  罗静珊  文科六班    物理     98\n",
       "24  1500100560  卜又琴  理科二班    语文     17\n",
       "25  1500100560  卜又琴  理科二班    数学    128\n",
       "26  1500100560  卜又琴  理科二班    英语      2\n",
       "27  1500100560  卜又琴  理科二班    化学     94\n",
       "28  1500100560  卜又琴  理科二班    地理     23\n",
       "29  1500100560  卜又琴  理科二班    生物     96\n",
       "30  1500100346  杭德昌  理科二班    语文    120\n",
       "31  1500100346  杭德昌  理科二班    数学      2\n",
       "32  1500100346  杭德昌  理科二班    英语    143\n",
       "33  1500100346  杭德昌  理科二班    化学     22\n",
       "34  1500100346  杭德昌  理科二班    地理     95\n",
       "35  1500100346  杭德昌  理科二班    生物      9\n",
       "36  1500100283  侯千风  文科三班    语文     21\n",
       "37  1500100283  侯千风  文科三班    数学    139\n",
       "38  1500100283  侯千风  文科三班    英语    143\n",
       "39  1500100283  侯千风  文科三班    政治      2\n",
       "40  1500100283  侯千风  文科三班    历史     24\n",
       "41  1500100283  侯千风  文科三班    物理     89\n",
       "42  1500100091  蔺水风  文科一班    语文     23\n",
       "43  1500100091  蔺水风  文科一班    数学     98\n",
       "44  1500100091  蔺水风  文科一班    英语      2\n",
       "45  1500100091  蔺水风  文科一班    政治     13\n",
       "46  1500100091  蔺水风  文科一班    历史     96\n",
       "47  1500100091  蔺水风  文科一班    物理     96\n",
       "48  1500100971  郝曼凡  文科五班    语文     13\n",
       "49  1500100971  郝曼凡  文科五班    数学      3\n",
       "50  1500100971  郝曼凡  文科五班    英语    134\n",
       "51  1500100971  郝曼凡  文科五班    政治     90\n",
       "52  1500100971  郝曼凡  文科五班    历史     19\n",
       "53  1500100971  郝曼凡  文科五班    物理      0\n",
       "54  1500100649  班鸿晖  文科三班    语文     25\n",
       "55  1500100649  班鸿晖  文科三班    数学    120\n",
       "56  1500100649  班鸿晖  文科三班    英语     22\n",
       "57  1500100649  班鸿晖  文科三班    政治     96\n",
       "58  1500100649  班鸿晖  文科三班    历史     90\n",
       "59  1500100649  班鸿晖  文科三班    物理      3"
      ]
     },
     "execution_count": 53,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "\n",
    "# 3、统计偏科最严重的前10个学生【学号。姓名，班级，科目名，分数】（标准差）\n",
    "\n",
    "# 关联分数表和科目表\n",
    "scores_subjects = scores.merge(subjects, on=\"cid\")\n",
    "\n",
    "# 对分数做归一化处理\n",
    "scores_subjects[\"score_one\"] =  scores_subjects[\"score\"] / scores_subjects[\"max_score\"]\n",
    "\n",
    "# 计算每个学生分数的标准差\n",
    "std_score = scores_subjects.groupby(\"id\")[\"score_one\"].std()\n",
    "\n",
    "# 取前十\n",
    "top10_std = std_score.sort_values(ascending=False).head(10).reset_index()\n",
    "\n",
    "# 关联获取分数和科目表\n",
    "top10_score = top10_std.merge(scores_subjects, on=\"id\")[[\"id\",\"cname\",\"score\"]]\n",
    "\n",
    "# 关联获取学生的姓名和班级\n",
    "top10_score.merge(students, on=\"id\")[[\"id\",\"name\",\"clazz\",\"cname\",\"score\"]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "adc0d05f89b71477",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
